Convolution Nets for MNIST

Deep Learning models can take quite a bit of time to run, particularly if GPU isn't used.

In the interest of time, you could sample a subset of observations (e.g. $1000$) that are a particular number of your choice (e.g. $6$) and $1000$ observations that aren't that particular number (i.e. $\neq 6$).

We will build a model using that and see how it performs on the test dataset



In [ ]:

    
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
#os.environ['THEANO_FLAGS'] = "device=gpu2"

#Import the required libraries
import numpy as np
np.random.seed(1338)

from keras.datasets import mnist
from keras.models import load_model



In [ ]:

    
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten



In [ ]:

    
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D



In [ ]:

    
from keras.utils import np_utils
from keras.optimizers import SGD

Loading Data



In [ ]:

    
#Load the training and testing data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Data Preparation

Very Important:

When dealing with images & convolutions, it is paramount to handle image_data_format properly



In [ ]:

    
img_rows, img_cols = 28, 28
'''
if K.image_data_format() == 'channels_first':
    shape_ord = (1, img_rows, img_cols)
else:  # channel_last
    shape_ord = (img_rows, img_cols, 1)
'''
shape_ord = (1, img_rows, img_cols)

Preprocess and Normalise Data



In [ ]:

    
X_train = X_train.reshape((X_train.shape[0],) + shape_ord)
X_test = X_test.reshape((X_test.shape[0],) + shape_ord)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255



In [ ]:

    
# Converting the classes to its binary categorical form
nb_classes = 10
y_train = np_utils.to_categorical(y_train, nb_classes)
y_test = np_utils.to_categorical(y_test, nb_classes)

A simple CNN



In [ ]:

    
# -- Initializing the values for the convolution neural network

nb_epoch = 100  # kept very low! Please increase if you have GPU

batch_size = 30000
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

Step 1: Model Definition



In [ ]:

    
model = Sequential()

model.add(Conv2D(nb_filters, nb_conv, nb_conv, 
                 input_shape=shape_ord))  # note: the very first layer **must** always specify the input_shape
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

Step 2: Compile



In [ ]:

    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

Step 3: Fit



In [ ]:

    
# Train or load! you choose!!
'''
hist = model.fit(X_train, y_train, batch_size=batch_size, 
                 nb_epoch=nb_epoch, verbose=1, 
                 validation_data=(X_test, y_test))
model.save('example_MNIST_CNN_base.h5')
'''
model=load_model('example_MNIST_CNN_base.h5')
model.summary()



In [ ]:

    
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.legend(['Training', 'Validation'])

plt.figure()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.legend(['Training', 'Validation'], loc='lower right')

Step 4: Evaluate



In [ ]:

    
print('Available Metrics in Model: {}'.format(model.metrics_names))



In [ ]:

    
# Evaluating the model on the test data    
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Let's plot our model Predictions!



In [ ]:

    
import matplotlib.pyplot as plt

%matplotlib inline



In [ ]:

    
slice = 15
predicted = model.predict(X_test[:slice]).argmax(-1)

plt.figure(figsize=(16,8))
for i in range(slice):
    plt.subplot(1, slice, i+1)
    plt.imshow(X_test[i,0], interpolation='nearest')
    plt.text(0, 0, predicted[i], color='black', 
             bbox=dict(facecolor='white', alpha=1))
    plt.axis('off')

Adding more Dense Layers



In [ ]:

    
model = Sequential()
model.add(Conv2D(nb_filters, nb_conv, nb_conv, input_shape=shape_ord))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])



In [ ]:

    
# Ah, another path to choose, Train or load!!
'''
hist = model.fit(X_train, y_train, batch_size=batch_size, 
                 nb_epoch=nb_epoch, verbose=1, 
                 validation_data=(X_test, y_test))
model.save('example_MNIST_CNN_more_dense.h5')
'''
model=load_model('example_MNIST_CNN_more_dense.h5')
model.summary()



In [ ]:

    
#Evaluating the model on the test data    
score, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)

Adding more Convolution Layers



In [ ]:

    
model = Sequential()
model.add(Conv2D(nb_filters, nb_conv, nb_conv, input_shape=shape_ord))
model.add(Activation('relu'))
model.add(Conv2D(nb_filters, nb_conv, nb_conv))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))
    
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))



In [ ]:

    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

'''
hist = model.fit(X_train, y_train, batch_size=batch_size/2, 
                 nb_epoch=nb_epoch, verbose=1, 
                 validation_data=(X_test, y_test))
model.save('example_MNIST_CNN_more_conv.h5')
'''
model=load_model('example_MNIST_CNN_more_conv.h5')
model.summary()



In [ ]:

    
#Evaluating the model on the test data    
score, accuracy = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score)
print('Test accuracy:', accuracy)

Exercise

The above code has been written as a function.

Change some of the hyperparameters and see what happens.



In [ ]:

    
# Function for constructing the convolution neural network
# Feel free to add parameters, if you want

def build_model():
    """"""
    model = Sequential()
    model.add(Conv2D(nb_filters, nb_conv, nb_conv, 
                     padding='valid',
                     input_shape=shape_ord))
    model.add(Activation('relu'))
    model.add(Conv2D(nb_filters, nb_conv, nb_conv))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
    model.add(Dropout(0.25))
    
    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

    model.fit(X_train, y_train, batch_size=batch_size, 
              epochs=nb_epoch,verbose=1,
              validation_data=(X_test, y_test))
          

    #Evaluating the model on the test data    
    score, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print('Test score:', score)
    print('Test accuracy:', accuracy)